import scrapy
from bs4 import BeautifulSoup


class Xueshu2Spider(scrapy.Spider):
    name = 'xueshu2'
    base = 'https://news.nau.edu.cn'
    allowed_domains = ['news.nau.edu.cn']
    start_urls = ['https://news.nau.edu.cn/5801/list.htm']

    def parse(self, response):
        for li in response.css('li.cols'):
            url = self.base + li.css('span a::attr(href)').get()
            yield response.follow(url, callback=self.parse_page)
            title = li.css('span a::text').get()
            date = li.css('span.cols_meta::text').get()
            yield {
                'title': title,
                'date': date
            }

        next_page = response.css('li.page_nav a.next::attr(href)').get()
        if next_page != 'javascript:void(0);':
            yield response.follow(next_page, callback=self.parse)


    def parse_page(self, response):
        article = response.css('div.article').get()
        soup = BeautifulSoup(article, features='lxml')
        article = soup.prettify()
        file_name = response.url.split('/')[-2] + '.html'
        with open(f'results/{file_name}', 'w', encoding='utf-8') as f:
            f.write(article)
